** Data reading and variable selection from raw data
** FRENCH 2003 TRAINING AND QUALIFYING SURVEY PROFESSIONAL


** 01. Reading data **

cap log close
clear all
set more off
cd /*insert you work directory here*/
use /*read your data here*/  

** 02. Consructing year and country variables **

ge year=2003
lab var year "survey year"

ge country=250
lab var country "ISO country code"
//France: 250 (ISO Country Codes) 


** 03. ID variables **

*ge pid=nolog
ge pid=_n
lab var pid "person id"


* need to destring variables
destring s ag nfs diplp diplm dipl cse fi cspp cspm, replace


** 04. Basic Demographics (Sex and Age/birth year) **

rename s sex
lab var sex "sex"
lab def sex 1 "male" 2 "female"
lab val sex sex

rename ag age
lab var age "age"

ge birthyr = year - age

lab var birthyr "Year of Birth"

** 05. Siblings **

* number of brothers/sisters does not include respondent

rename nfs nsibs
lab var nsibs "number of siblings"


** 06. Own education **

rename dipl educ_cat


**father and mother's education**

rename diplp faeduc_cat

rename diplm moeduc_cat


//label respondent education

lab var educ_cat "highest level of education completed"


** 07. Parents' education: Father and/or Mother **

lab var faeduc_cat "father's education level"
lab var moeduc_cat "mother's education level"

**label education values (see codebook p.158) **

#delimit ;
lab def educ_cat 
10 "2nd or 3rd university cycle"
11 "grande ecole, diplome d'ingenieur"
30 "1er cycle universitaire"
31 "BTS, DUT"
32 "Paramedical or social with general baccalaureate"
33 "Paramedical or social without general baccalaur�at"
40 "General Baccalaureate and diplome technique secondarire"
41 "Baccalaureat general only"
42 "bacacalaureat technologique, baccalaureat professionnel et brevet professionnel"
43 "BEI, BEC, BEA"
50 "CAP, BEP and BEPC"
51 "CAP, BEP only"
60 "BEPC only"
70 "CEP"
71 "No diploma" ;
#delimit cr

lab val educ_cat educ_cat
lab val faeduc_cat educ_cat
lab val moeduc_cat educ_cat

** Create Years of Education Variables for respondent and parents**

ge educ_yrs = .
replace educ_yrs = 18 if educ_cat == 10
replace educ_yrs = 17 if educ_cat == 11
replace educ_yrs = 14 if educ_cat == 30
replace educ_yrs = 14 if educ_cat == 31
replace educ_yrs = 14.5 if educ_cat == 32
replace educ_yrs = 13 if educ_cat == 33
replace educ_yrs = 12 if educ_cat == 40
replace educ_yrs = 12 if educ_cat == 41
replace educ_yrs = 12 if educ_cat == 42
replace educ_yrs = 13 if educ_cat == 43
replace educ_yrs = 11 if educ_cat == 50
replace educ_yrs = 11 if educ_cat == 51
replace educ_yrs = 9 if educ_cat == 60
replace educ_yrs = 9 if educ_cat == 70
replace educ_yrs = 9 if educ_cat == 71


lab var educ_yrs "Years of education - respondent"

ge faeduc_yrs = .
replace faeduc_yrs = 18 if faeduc_cat == 10
replace faeduc_yrs = 17 if faeduc_cat == 11
replace faeduc_yrs = 14 if faeduc_cat == 30
replace faeduc_yrs = 14 if faeduc_cat == 31
replace faeduc_yrs = 14.5 if faeduc_cat == 32
replace faeduc_yrs = 13 if faeduc_cat == 33
replace faeduc_yrs = 12 if faeduc_cat == 40
replace faeduc_yrs = 12 if faeduc_cat == 41
replace faeduc_yrs = 12 if faeduc_cat == 42
replace faeduc_yrs = 13 if faeduc_cat == 43
replace faeduc_yrs = 11 if faeduc_cat == 50
replace faeduc_yrs = 11 if faeduc_cat == 51
replace faeduc_yrs = 9 if faeduc_cat == 60
replace faeduc_yrs = 9 if faeduc_cat == 70
replace faeduc_yrs = 9 if faeduc_cat == 71

lab var faeduc_yrs "Years of education - father"

ge moeduc_yrs = .
replace moeduc_yrs = 18 if moeduc_cat == 10
replace moeduc_yrs = 17 if moeduc_cat == 11
replace moeduc_yrs = 14 if moeduc_cat == 30
replace moeduc_yrs = 14 if moeduc_cat == 31
replace moeduc_yrs = 14.5 if moeduc_cat == 32
replace moeduc_yrs = 13 if moeduc_cat == 33
replace moeduc_yrs = 12 if moeduc_cat == 40
replace moeduc_yrs = 12 if moeduc_cat == 41
replace moeduc_yrs = 12 if moeduc_cat == 42
replace moeduc_yrs = 13 if moeduc_cat == 43
replace moeduc_yrs = 11 if moeduc_cat == 50
replace moeduc_yrs = 11 if moeduc_cat == 51
replace moeduc_yrs = 9 if moeduc_cat == 60
replace moeduc_yrs = 9 if moeduc_cat == 70
replace moeduc_yrs = 9 if moeduc_cat == 71

lab var moeduc_yrs "Years of education - mother"

** 08. Own occupation **

rename cse occ_code

rename fi emp_stat

lab var occ_code "Occupational Category"

lab var emp_stat "employment status"

*label occupation values*

#delimit ;
lab def emp_statL 

1 "Exercises a profession"
2 "Unemployed (registered or not with the ANPE)"
3 "Student, pupil, trainee, unpaid internship"
4 "Military contingent"
5 "Retired (former employee) or pre-retired"
6 "Withdrawn from business (former farmer, former craftsman, former merchant)"
7 "Homemaker (including parental leave)"
8 "Other inactive (including persons receiving only a reversion pension and Disabled persons)";

lab def occ_codeL 

1 "Unknown"
2 "Smallholder farmer"
3 "Farmer on medium holding"
4 "Farmer on large holding"
5 "Artisan"
6 "Merchant and related"
7 "Entrepreneur with 10 or more employees"
8 "Professional Occupation"
9 "Public Service Framework"
10 "Professor, scientific profession"
11 "Information, Arts and Entertainment Occupations"
12 "Corporate Administrative and Business Framework"
13 "Engineer and technical manager"
14 "School teacher, teacher and related occupations"
15 "Intermediate health and social work worker"
16 "Clergy, Religious"
17 "Administrative administrative intermediary of the civil service"
18 "Administrative and business intermediary business"
19 "Technician"
20 "Foreman, supervisor"
21 "Civilian Employee and Public Service Officer"
22 "Supervisory officer"
23 "Administrative clerk"
24 "Business Employee"
25 "Direct Services Personnel"
26 "Qualified Industrial Worker"
27 "Skilled craftsman"
28 "Driver"
29 "Qualified Worker in Handling, Warehousing and Transportation"
30 "Unskilled Industrial Worker"
31 "Unskilled craftsman"
32 "Agricultural and related worker";
#delimit cr

lab val emp_stat emp_statL

lab val occ_code occ_codeL


** 09. Parents' occupation **

rename cspp faocc_code

rename cspm moocc_code

lab var faocc_code "Father's Occupational Category"

lab var moocc_code "Mother's Occupational Category"

*label parental occupation values*

lab val faocc_code occ_codeL

lab val moocc_code occ_codeL

** 10. Tabulate the Identified Variables **

log using  /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** FRENCH 2003 TRAINING AND QUALIFYING SURVEY PROFESSIONAL

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs, d

** R's Own Education **
tab1 educ_cat educ_yrs

** Parental Education **
tab1 faeduc_cat moeduc_cat faeduc_yrs moeduc_yrs

** R's Own Occupation **
tab1 occ_code emp_stat

** Parent's Occupation **
tab1 faocc_code moocc_code

log close

** 11. Keep the identified variables only

keep year country pid sex age birthyr ///
	 nsibs ///
	 educ_cat educ_yrs faeduc_cat moeduc_cat faeduc_yrs moeduc_yrs ///
	 occ_code emp_stat faocc_code moocc_code



** 12. Create ISCED Education Variables **

ge educ_ISCED = .
replace educ_ISCED = 700 if educ_cat == 10
replace educ_ISCED = 756 if educ_cat == 11
replace educ_ISCED = 554 if educ_cat == 30
replace educ_ISCED = 554 if educ_cat == 31
replace educ_ISCED = 554 if educ_cat == 32
replace educ_ISCED = 354 if educ_cat == 33
replace educ_ISCED = 344 if educ_cat == 40
replace educ_ISCED = 344 if educ_cat == 41
replace educ_ISCED = 354 if educ_cat == 42
replace educ_ISCED = 353 if educ_cat == 43
replace educ_ISCED = 353 if educ_cat == 50
replace educ_ISCED = 353 if educ_cat == 51
replace educ_ISCED = 244 if educ_cat == 60
replace educ_ISCED = 200 if educ_cat == 70
replace educ_ISCED = 200 if educ_cat == 71

ge faeduc_ISCED = .
replace faeduc_ISCED = 700 if faeduc_cat == 10
replace faeduc_ISCED = 756 if faeduc_cat == 11
replace faeduc_ISCED = 554 if faeduc_cat == 30
replace faeduc_ISCED = 554 if faeduc_cat == 31
replace faeduc_ISCED = 554 if faeduc_cat == 32
replace faeduc_ISCED = 354 if faeduc_cat == 33
replace faeduc_ISCED = 344 if faeduc_cat == 40
replace faeduc_ISCED = 344 if faeduc_cat == 41
replace faeduc_ISCED = 354 if faeduc_cat == 42
replace faeduc_ISCED = 353 if faeduc_cat == 43
replace faeduc_ISCED = 353 if faeduc_cat == 50
replace faeduc_ISCED = 353 if faeduc_cat == 51
replace faeduc_ISCED = 244 if faeduc_cat == 60
replace faeduc_ISCED = 200 if faeduc_cat == 70
replace faeduc_ISCED = 200 if faeduc_cat == 71

ge moeduc_ISCED = .
replace moeduc_ISCED = 700 if moeduc_cat == 10
replace moeduc_ISCED = 756 if moeduc_cat == 11
replace moeduc_ISCED = 554 if moeduc_cat == 30
replace moeduc_ISCED = 554 if moeduc_cat == 31
replace moeduc_ISCED = 554 if moeduc_cat == 32
replace moeduc_ISCED = 354 if moeduc_cat == 33
replace moeduc_ISCED = 344 if moeduc_cat == 40
replace moeduc_ISCED = 344 if moeduc_cat == 41
replace moeduc_ISCED = 354 if moeduc_cat == 42
replace moeduc_ISCED = 353 if moeduc_cat == 43
replace moeduc_ISCED = 353 if moeduc_cat == 50
replace moeduc_ISCED = 353 if moeduc_cat == 51
replace moeduc_ISCED = 244 if moeduc_cat == 60
replace moeduc_ISCED = 200 if moeduc_cat == 70
replace moeduc_ISCED = 200 if moeduc_cat == 71


** 13. Save the Data File **

saveold  /*insert you work directory here*/, replace
